#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <time.h>
#include "../base.h"

#define PORT 23240

#define rand_max (float)RAND_MAX
#define r() ((rand()/rand_max)*2-1)

void matrix_mult_4x4(const float *A, const float *B, float *T, int n)
{
	/* T = A x B */
	T[0]    += A[0]*B[0]    + A[1]*B[n]         + A[2]*B[2*n]       + A[3]*B[3*n];
	T[1]    += A[0]*B[1]    + A[1]*B[n+1]       + A[2]*B[2*n+1]     + A[3]*B[3*n+1];
	T[2]    += A[0]*B[2]    + A[1]*B[n+2]       + A[2]*B[2*n+2]     + A[3]*B[3*n+2];
	T[3]    += A[0]*B[3]    + A[1]*B[n+3]       + A[2]*B[2*n+3]     + A[3]*B[3*n+3];
	T[n]    += A[n]*B[0]    + A[n+1]*B[n]       + A[n+2]*B[2*n]     + A[n+3]*B[3*n];
	T[n+1]  += A[n]*B[1]    + A[n+1]*B[n+1]     + A[n+2]*B[2*n+1]   + A[n+3]*B[3*n+1];
	T[n+2]  += A[n]*B[2]    + A[n+1]*B[n+2]     + A[n+2]*B[2*n+2]   + A[n+3]*B[3*n+2];
	T[n+3]  += A[n]*B[3]    + A[n+1]*B[n+3]     + A[n+2]*B[2*n+3]   + A[n+3]*B[3*n+3];
	T[2*n]  += A[2*n]*B[0]  + A[2*n+1]*B[n]     + A[2*n+2]*B[2*n]   + A[2*n+3]*B[3*n];
	T[2*n+1]+= A[2*n]*B[1]  + A[2*n+1]*B[n+1]   + A[2*n+2]*B[2*n+1] + A[2*n+3]*B[3*n+1];
	T[2*n+2]+= A[2*n]*B[2]  + A[2*n+1]*B[n+2]   + A[2*n+2]*B[2*n+2] + A[2*n+3]*B[3*n+2];
	T[2*n+3]+= A[2*n]*B[3]  + A[2*n+1]*B[n+3]   + A[2*n+2]*B[2*n+3] + A[2*n+3]*B[3*n+3];
	T[3*n]  += A[3*n]*B[0]  + A[3*n+1]*B[n]     + A[3*n+2]*B[2*n]   + A[3*n+3]*B[3*n];
	T[3*n+1]+= A[3*n]*B[1]  + A[3*n+1]*B[n+1]   + A[3*n+2]*B[2*n+1] + A[3*n+3]*B[3*n+1];
	T[3*n+2]+= A[3*n]*B[2]  + A[3*n+1]*B[n+2]   + A[3*n+2]*B[2*n+2] + A[3*n+3]*B[3*n+2];
	T[3*n+3]+= A[3*n]*B[3]  + A[3*n+1]*B[n+3]   + A[3*n+2]*B[2*n+3] + A[3*n+3]*B[3*n+3];
}


int main()
{
	int sockfd, newsockfd;
	int num_r, num_w;
	int errcode;
	socklen_t clilen=0;
	struct sockaddr_in cli_addr, serv_addr;
	char buff[BUFF_SIZE];
	struct timespec start[3], stop[3];
	double diff_time;
	double exec_time = 0.0;

	sockfd = socket(AF_INET, SOCK_STREAM, 0);
	if (sockfd == -1) {
		system_error();
	}

	memset(&serv_addr, 0, sizeof(serv_addr));
	serv_addr.sin_family = AF_INET;
	serv_addr.sin_addr.s_addr = htonl(INADDR_ANY);
	serv_addr.sin_port = htons(PORT);
	int optval = 1;
	setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval));

	errcode = bind(sockfd, (struct sockaddr*)&serv_addr, sizeof(struct sockaddr));
	if (errcode == -1) {
		system_error();
	}

	if (listen(sockfd, 5) == -1) {
		system_error();
	}

	while (1) {
		newsockfd = accept(sockfd, (struct sockaddr*)&cli_addr, &clilen);
		if (newsockfd == -1) {
			system_error();
		}
		FILE *stream = fdopen(newsockfd, "r+");

		
		fwrite("GRANT SERVICE", sizeof(char), 14, stream);

		/* Get size of matrix */
		int N;
		num_r = fread(&N, sizeof(int), 1, stream);
		int n = N/4;
		
		/* Receive matrices */
		float A[N*N], B[N*N], T[N*N];
		float *ma, *mb, *mt;
		num_r = fread(A, sizeof(float), N*N, stream);
		num_r = fread(B, sizeof(float), N*N, stream);
		clock_gettime(CLOCK_MONOTONIC, &start[0]);
		for (int i=0; i<n; i++) {
			for (int j=0; j<n; j++) {
				for (int k=0; k<n; k++) {
					ma = &A[4*4*n*i+4*k];
					mb = &B[4*4*n*k+4*j];
					mt = &T[4*4*n*i+4*j];
					matrix_mult_4x4(ma, mb, mt, N);
				}
			}
		}
		clock_gettime(CLOCK_MONOTONIC, &stop[0]);
		exec_time = calc_diff_time(&start[0], &stop[0]);

		exec_time = exec_time/1000; /* us */
		fwrite(&exec_time, sizeof(double), 1, stream);

		fwrite(T, sizeof(float), N*N, stream);
#if 0
		for (int i=0; i<N; i++) {
			for (int j=0; j<N; j++) {
				printf("%f\t", T[N*i+j]);
			}
			printf("\n");
		}
#endif

		fclose(stream);
		close(newsockfd);
	}
	close(sockfd);

	return 0;
}

